 
 
  *path in the BCSS server (to use in Belgium)
 
  global data Z:\STATA files
  global outputDS Z:\Figures\Descriptive 
  global export Z:\Export 
  
  cap log close
  
  log using "$data\main", replace 
  
  
  ***********************************************
  * raw stata files from BCSS server 
  ************************************************
  global data Z:\STATA files
  global outputDS Z:\Figures\Descriptive 
  global export Z:\Export 
  
  forval y=2010/2020{
  use "Z:\STATA files\pse_1_pop_lim_klant_`y'_code.dta" , clear 
  g year=`y'
  drop if gecodeerd_KBO=="       ."
  save "Z:\STATA files\client_`y'.dta" , replace
  }
  
  use "Z:\STATA files\client_2010.dta", clear 
  forval y=2011/2020{
  append using "Z:\STATA files\client_`y'.dta" 
  }
  
  save "Z:\STATA files\client_panel.dta", replace
  
  use "$data\pse_1_pop_entreprises_code.dta", clear
  drop if gecodeerd_KBO=="       ."
  save "$data\KBO_client_nonmissing.dta", replace 
  
  
 
  
  ****************************************************************
  * Clean other raw data sources 
  ***************************************************************
  
  forval y=2010/2020{
   use "$data\pse_1_pop_lim_meld_pow_`y'_code.dta", clear

   g year=`y'
   save "$data\pse_1_pop_lim_meld_pow_`y'_code_date.dta", replace
   }

   use "$data\pse_1_pop_lim_meld_pow_2010_code_date.dta", clear
   forval y=2011/2020{
   append using "$data\pse_1_pop_lim_meld_pow_`y'_code_date.dta"
   }
   save "$data\panelmeldpow.dta", replace
   
  forval y=2010/2020{
  use "$data\pse_1_pop_lim_meld_kl_`y'_code.dta", clear
  g year=`y'
  save "$data\pse_1_pop_lim_meld_kl_`y'_code_date.dta", replace
  }

  use "$data\pse_1_pop_lim_meld_kl_2010_code_date.dta", clear
  forval y=2011/2020{
  append using "$data\pse_1_pop_lim_meld_kl_`y'_code_date.dta"
  }
  save "$data\panelmeldklant.dta", replace
  
  
   forval y=2010/2020{
   use "$data\pse_1_pop_lim_werkgev_`y'_code.dta", clear
   g year=`y'
   save "$data\pse_1_pop_lim_werkgev_`y'_code_date.dta", replace
   }

   use "$data\pse_1_pop_lim_werkgev_2010_code_date.dta", clear
   forval y=2011/2020{
   append using "$data\pse_1_pop_lim_werkgev_`y'_code_date.dta"
   }
   save "$data\panelwerkgev.dta", replace
   
   forval y=2010/2020{
   use "$data\pse_1_pop_lim_persoon_`y'_code.dta", clear
   g year=`y'
   save "$data\pse_1_pop_lim_persoon_`y'_code_date.dta", replace
   }

   use "$data\pse_1_pop_lim_persoon_2010_code_date.dta", clear
   forval y=2011/2020{
   append using "$data\pse_1_pop_lim_persoon_`y'_code_date.dta"
   }
   save "$data\panelpersoon.dta", replace
   

  forval y=2010/2020{
  use "$data\pse_1_pop_lim_detach_`y'_code.dta", clear
  g year=`y'
  save "$data\pse_1_pop_lim_detach_`y'_code_date.dta", replace
  }
  
  
  use "$data\pse_1_pop_lim_detach_2010_code_date.dta", clear
  forval y=2011/2020{
  append using "$data\pse_1_pop_lim_detach_`y'_code_date.dta"
  }
  save "$data\paneldetach.dta", replace

  

   *data set with ID of receiving firm and number of unique received posted workers 
    use "$data\panelmeldklant.dta", clear 
	g decl=1
	g duration= Einddatum_activiteit-Begindatum_activiteit
	collapse (sum) decl, by(Klant_ID Werknemer_ID year)
    g worker=1
	collapse (sum) worker decl, by(Klant_ID year)
	drop if Klant_ID==4153134529
	save "$data\panelclientwithtreatment.dta", replace
    
	
   preserve 
   collapse (sum) decl, by(year)
   twoway (connected decl year), graphregion(fcolor(white) lcolor(white)) ytitle("Posting declarations") xlabel(2010 (1) 2020, angle(vertical)) xtitle("")
   	graph export "Z:\Figures\decl_melkl.png", replace
   restore
   
   preserve
   collapse (sum) worker, by(year)
   twoway (connected worker year), graphregion(fcolor(white) lcolor(white)) ytitle("Unique Posted Workers") xlabel(2010 (1) 2020, angle(vertical)) xtitle("")
   	graph export "Z:\Figures\worker_melkl.png", replace
   restore
   
    use "$data\panelmeldklant.dta", clear 
		duplicates drop Klant_ID Werknemer_ID Werkgever_ID year Einddatum_activiteit Begindatum_activiteit, force
		tostring Begindatum_activiteit, g(date_debut)
gen year_debut=substr(date_debut,1,4)
gen month_debut=substr(date_debut,5,6)
tostring Einddatum_activiteit, g(date_fin)
gen year_fin=substr(date_fin,1,4)
gen month_fin=substr(date_fin,5,6)
destring year_debut, replace
destring month_debut, replace
destring year_fin, replace
destring month_fin, replace
g day_debut=1
generate debutprestation= mdy(month_debut, day_debut, year_debut)
generate finprestation= mdy(month_fin, day_debut, year_fin)
format debutprestation %d
format finprestation %d
g duration2=finprestation-debutprestation+31
replace duration2=31 if finprestation==debutprestation
g janvier=1	
g decembre=12
generate finyear=mdy(decembre, day_debut, year)
generate debutyear=mdy(janvier, day_debut, year)
format finyear %d
format debutyear %d
duplicates tag Klant_ID Werknemer_ID Werkgever_ID  Einddatum_activiteit, g(flag) 
replace duration2=finyear-debutprestation+31 if finyear<finprestation
 replace duration2=finprestation-debutyear+31 if debutyear>debutprestation
  collapse (sum) duration2, by(Klant_ID year)
  *generic KlantID="fake firm" created by BCSS
  drop if Klant_ID==4153134529
 merge 1:1 year Klant_ID using "$data\panelclientwithtreatment.dta" 
 drop _m 
 save "$data\panelclientwithtreatmentd.dta", replace
 
 
 *********************************************************************
 *. Raw data on the list of clients by each in LIMOSA
 *********************************************************************
  
  

  forval y=2010/2020{
  use "Z:\STATA files\pse_1_pop_lim_klant_`y'_code.dta" , clear 
  g year=`y'
  drop if gecodeerd_KBO=="       ."
  save "Z:\STATA files\client_`y'.dta" , replace
  }
  
  use "Z:\STATA files\client_2010.dta", clear 
  forval y=2011/2020{
  append using "Z:\STATA files\client_`y'.dta" 
  }
  
  save "Z:\STATA files\client_panel.dta", replace
  
  
 **********************************************************************
 *. Raw data on belgian employment (datawarehouse)-- quarterly data
 *********************************************************************
  *I- Create a company-year dataset
  *define employment at the firm-level using the "accounting" measure: number of employees in the last quarter of the year
  *merge info on list of belgian clients to identify posting events 

 
  forval y=8/9{
     use "$data\pse_1_pop_rsz_0`y'4_code.dta", clear
     append using  "$data\pse_3_pop_rsz_0`y'4_code.dta"
     duplicates tag, g(flag)
     sort gecodeerd_KBO gecodeerd_insz
	 bys gecodeerd_KBO gecodeerd_insz: keep if _n==_N
	 g freq=1 
     g year=200`y' 
	 g salblue=saljrs if clatra==1
    collapse (sum) freq (mean) sal* cotpat* primes_klasse codnac3 codimp insemp , by(gecodeerd_KBO  year  gecodeerd_matric) 
	save "$data\rsz_200`y'.dta", replace
  }
	
	forval y=10/19{
     use "$data\pse_1_pop_rsz_`y'4_code.dta", clear
     append using  "$data\pse_3_pop_rsz_`y'4_code.dta"
     duplicates tag, g(flag)
     sort gecodeerd_KBO gecodeerd_insz
	 bys gecodeerd_KBO gecodeerd_insz: keep if _n==_N
	 g freq=1 
     g year=20`y' 
	 g salblue=saljrs if clatra==1
    collapse (sum) freq (mean) sal* cotpat* primes_klasse codnac3 codimp insemp , by(gecodeerd_KBO  year       gecodeerd_matric) 
	save "$data\rsz_20`y'.dta", replace
  }
	
	
	use "$data\rsz_2008.dta", clear 
	append using "$data\rsz_2009.dta"
	forval y=10/19{
	append using  "$data\rsz_20`y'.dta"
	}
    merge m:1 gecodeerd_KBO year using  "Z:\STATA files\client_panel.dta"
	*drop covid year
	drop if year==2020
	*firms in limosa but not in datawarehouse scope (or 2020)
    count if _m==2
	drop if _m==2 
	*user=1 if when a firm is using a limosa declaration that year
	gen user=0
	replace user=1 if _m==3
  
    sort gecodeerd_KBO  year 
    *cumulative sum of using period by firm, cum=1 first time user=1
    bys gecodeerd_KBO ( year): gen cum=sum(user)
    order gecodeerd_KBO year  user cum
	*total number of years where the firm is observed "matching" with a limosa declaration
    egen panel_using=sum(user), by(gecodeerd_KBO)	
    g tagix=1
    egen panel_tot=sum(tagix), by(gecodeerd_KBO)	
	*nb of years using limosa/total numbers of years observed in the panel 
    g s=panel_using/panel_tot
	*firm-level constant variable=1 if has used at least once a limosa declaration (user>1 for at least a year)
    g treated=0
    replace treated=1 if panel_using>0
	*first year a firm has used a limosa declaration
    g event=year if user==1 & cum==1
    egen event_d=min(event), by(gecodeerd_KBO)
    *control firms have 0 event_d but will drop them later
    replace event_d=0 if treated==0 
    *time to event variable
    g time=year-event_d if treated==1
	su time 
	*to avoid having minus numbers add the number=min of negative years to treatment 
	* (11 years here, last firm treated in 2019 can only be observed up to 2008 so 11 years before treatment)
    g timed=time+11
 	su timed
	*dummy treatment variable for each year to time event
    forval y=0/20{
    g coeff`y'=0
	replace coeff`y'=1 if timed==`y' & treated==1
    }

 	forval y=0/20{
 	local j=`y'- 11
	label var coeff`y' "`j'"
	}
	
	
	*bin treatment coefficient at the bottom and top of treatment window such that treatment is constant for t>4 & t<-5
	*then we will have non constant  treatment effect from -4 to 4 
	
	g coeff_up=0
	replace coeff_up=1 if time>4
	
	g coeff_bottom=0
	replace coeff_bottom=1 if time<-4
	
	*employment is number of employees observed working (freq=1) in the firm the last quarter of each year 
	g logemp=log(freq)
	g logwage=log(saljrs)
	g zero=1
	
	*geographic cluster to account for spatial autocorrelation of se's
	egen cluster=group(event_d insemp)
	*balance-sheets data at the firm level retreated by the fiscal administration
	*allows to compare measure of employment from different sources 
	cap drop _m
	drop if missing(gecodeerd_KBO)
	merge m:1 year gecodeerd_KBO using "Z:\STATA files\pse_1_pop_nbb_code.dta"
	drop if _m==2	
	label var zero "-1"
	g logblue=log(salblue)
	
	*******************************
	*FIGURE 6: event-study
	*Focus on "only treated"
	*Focus only on treatment for which 4 years pre treatment is observed 
	*Treated after 2013: we observe no treatment in 2010-2013
	*************************************************************
	
	
	*Domestic employment 
	reghdfe logemp coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	coefplot, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(  fcolor(white) lcolor(white)) ytitle("Log Total Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.1) 0.2) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10))
	graph export "Z:\Figures\logemp_ot_bis.png", replace

	*Domestic wages
	reghdfe logwage coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	coefplot, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(  fcolor(white) lcolor(white)) ytitle("Log Total Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.1) 0.2) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10))
	graph export "Z:\Figures\logwage_ot_bis.png", replace
	
	
	*alternative measure of employment: robustness check using the balance sheet employment data
	*from bank of belgium 
	
	g logemp2=log(A_VALUE_C_1053)
	g logmales=log(A_VALUE_C_1203)
	g logfemales=log(A_VALUE_C_1213)
	
    g logbluecollar= log(A_VALUE_C_1323)
    g logemployees= log(A_VALUE_C_1343)
    g logwagecost= log(A_VALUE_C_1023)
    g logmanagers= log(A_VALUE_C_1303)
	
	g logothers=(A_VALUE_C_1053-A_VALUE_C_1323)
	g logshare=(A_VALUE_C_1323/A_VALUE_C_1053)
    g logwage2=log(A_VALUE_C_1023/A_VALUE_C_1053)

	
	*Very similar results-- employment measures are similar 
	reghdfe logemp2 coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	label var zero "-1"	
	coefplot, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(  fcolor(white) lcolor(white)) ytitle("Log Total Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.1) 0.2) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10))
	graph export "Z:\Figures\logemp2_ot_bis.png", replace

	
	*heterogeneity:blue collar employment 
	reghdfe logbluecollar coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store bluecollar
	coefplot, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(  fcolor(white) lcolor(white)) ytitle("Log Blue Collar Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.1) 0.2) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10))
	graph export "Z:\Figures\logbluecollar_ot_bis.png", replace	
	
	
		
	g other=A_VALUE_C_1053-A_VALUE_C_1323
	su other if treated==1 & event_d>2013, d 
	su other if treated==1 & time==0, d 
	
	su A_VALUE_C_1323 if treated==1 & event_d>2013, d 
	su A_VALUE_C_1323 if treated==1 & time==0, d 
	
	
	
	
    *Descriptive statistics for appendix table 
	
	
	tab codnac3 
	tab codnac3 if treated==1 , sort
	tab codnac3 if treated==0, sort
	su freq if treated==1, d
	su freq if treated==0, d 
	su A_VALUE_C_1053 if treated==0, d 
	su A_VALUE_C_1053 if treated==1, d
	su saljrs if treated==1, d 
	su saljrs if treated==0, d 
	
	*Importers premium for appendix 
	
	reghdfe logemp2 i.treated, absorb(year##codnac3) cl(cluster)
	reghdfe logwage i.treated, absorb(year##codnac3) cl(cluster)
	reghdfe logbluecollar i.treated, absorb(year##codnac3) cl(cluster)
	reghdfe logemp i.treated, absorb(year##codnac3) cl(cluster)

	
	keep if treated==1
	
	*Robustness to using semi and fully dymamic estimation: Figure DL 
	
	reghdfe logemp  zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store SemiDynamic
	reghdfe logemp  coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013, absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store FullyDynamic
	coefplot  SemiDynamic FullyDynamic, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) order(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(  fcolor(white) lcolor(white)) ytitle("Log Total Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.1) 0.2) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10))
	graph export "Z:\Figures\dynamicornot_bis.png", replace
	 
	encode(gecodeerd_KBO), g(id)
	did_imputation logemp  id  year event_d if event_d>2013, autosample pretrends(4) horizon(0 1 2 3 4 )
	event_plot,  default_look graph_opt(xtitle("Years Since First Posting Use") ytitle("Average Effect") ///
	title("Borusyak et al. (2021) imputation estimator", size(small)) )
	graph export "Z:\Figures\imputed_bis.png", replace

	
	bys event_d: count 
	bys event_d: tab panel_using
	
	
	
	
	*********************************************************************

	g post=0
	replace post=1 if treated==1 & time>-1
	cap drop _m
	merge m:1 Klant_ID year using "$data\panelclientwithtreatmentd.dta"
	tab user if _m==1 
	egen test=sum(worker), by(year)
	cap drop if _m==2
	replace A_VALUE_C_1323=0 if missing(A_VALUE_C_1323)
		g share=A_VALUE_C_1323/A_VALUE_C_1053
		g share2=share
	    replace share=0 if missing(A_VALUE_C_1323)
	g share3=worker/A_VALUE_C_1053
	g share4=worker/freq
		preserve
	replace share3=0 if missing(share3)
	egen totw=sum(worker), by(year)
	egen totf=sum(freq), by(year)
	egen tot=sum(A_VALUE_C_1053), by(year)
    egen totb=sum(A_VALUE_C_1323), by(year)

		collapse (mean) decl worker freq share* A_VALUE_C_1323 A_VALUE_C_1053  test tot*, by(year)
		
g salt=totw/tot
g sb=totb/tot
g sb2=totb/totf

	
		twoway ///
	(connected salt year if year>2009), graphregion(fcolor(white) lcolor(white)) ///
	xlabel(2010 (1) 2019) xtitle("") ytitle("Posted Workers/Total Workers at Using Firms")
		graph export "Z:\Figures\pwexposure.png", replace
		
		twoway ///
	(connected share year if year>2007) (connected salt year if year>2009, yaxis(2)), graphregion(fcolor(white) lcolor(white)) ///
	xlabel(2008 (1) 2019, angle(vertical)) xtitle("") ytitle("Posted Workers/Total Workers at Using Firms", axis(2))  ytitle("Average Share of Blue Collar Workers At Using Firm", axis(1)) legend(off) 
			graph export "Z:\Figures\pwexposure_step2.png", replace
	
		
restore
		
	
	
	preserve 
	collapse (sum) decl worker (mean) test, by(year)
	
	twoway (connected worker year if year>2010), graphregion(fcolor(white) lcolor(white)) ///
	ytitle("Unique Posted Workers") xtitle("") xlabel(2010 (1) 2019, angle(vertical))	
	graph export "Z:\Figures\uniquepw_usingifrms.png", replace
    
	twoway (connected decl year if year>2010), graphregion(fcolor(white) lcolor(white)) ///
	ytitle("Unique Posting Declarations") xtitle("") xlabel(2010 (1) 2019, angle(vertical))	
	graph export "Z:\Figures\uniquedecl_usingifrms.png", replace
	g s=worker/test
	twoway (connected s year if year>2010), graphregion(fcolor(white) lcolor(white)) ///
	ytitle("share") xtitle("") xlabel(2010 (1) 2019, angle(vertical))	
		graph export "Z:\Figures\s_usingifrms.png", replace
		
		
	twoway (connected test year if year>2010) (connected worker year if year>2010), graphregion(fcolor(white) lcolor(white)) ///
	ytitle("share") xtitle("") xlabel(2010 (1) 2019, angle(vertical))	///
	legend(label(1 "All posted workers") label(2 "Posted workers at using firms"))
		graph export "Z:\Figures\usingfirmsvsall.png", replace

	restore 
	
	****
    su panel_using, d 
	bys event_d: tab panel_using
	tab panel_using if event_d>2013
	bys codnac3: tab panel_using
	
	g panelr=panel_using/panel_tot
	su panelr, d 
	bys event_d: su panelr
	su panelr if event_d>2013, d 
	bys codnac3: su panelr
	*****

	xtset id year 
	
	
	
	*only matched in days where you use posted workers 
	drop if _m==2
	preserve 
	collapse (sum) worker decl freq, by(time)
	g s=worker/freq
	twoway (connected s time),graphregion(fcolo(white) lcolor(white)) ///
	xtitle("Year to First Posting Use") ytitle("Unique Posted Workers/Domestic Workers")
	graph export "Z:\Figures\treatmentintensity_all.png", replace
	restore
	
    su worker if time==0 & event_d>2013
	su freq if time==0 & event_d>2013
	
	g intensity=.
	replace intensity=worker/freq if time==0
	egen inti=min(intensity), by(gecodeerd_KBO)
   
	
	replace worker=0 if missing(worker)
	g logall=log(worker+freq)
	
	*All employment (domestic workers+posted workers): Figure 6
	reghdfe logall coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013 , absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store all 
	reghdfe logemp coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013 , absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store natives
	reghdfe logwage coeff_bottom coeff7-coeff9 zero coeff11-coeff15 coeff_up if treated==1 & event_d>2013 , absorb(year##codnac3 gecodeerd_KBO) cl(cluster)
	est store wage
	
	coefplot all natives wage, keep(coeff7 coeff8 coeff9 zero coeff11 coeff12 coeff13 coeff14 coeff15  ) omit vertical graphregion(    fcolor(white) lcolor(white)) ytitle("Log Employment") xtitle("Year Since First Posting Use") ///
	ylabel(-0.2 (0.2) 0.6) xline(4.5, lcolor(red)) ciopts(recast(rcap)) lwidth(medthick) yline(0, lpattern(dash) lcolor(gs10)) 
		graph export "Z:\Figures\postedanddomestic.png", replace

		save  "$data\baselinedataset.dta", replace	
	log close 
